libinstall <- function(pkg) {
if(!require(pkg, character.only = T))install.packages(pkg)
library(pkg, character.only = T)
}
libinstall("tidyverse")
libinstall("glue")
libinstall("readr")
libinstall("plotly")
libinstall("readr")
libinstall("readxl")
libinstall("lubridate")
libinstall("curl")
libinstall("epidata")
if(!curl::has_internet())quit()
# Download cpsaat data
tmp <- tempfile()
curl_download("https://www.bls.gov/cps/cpsaat11.xlsx", destfile = tmp)
# Import cpsaat
cpsaat11 <- read_excel(
tmp,
col_names = c(
"Occupation",
"Total",
"Women",
"White",
"Black/African American",
"Asian",
"Hispanic/Latino"
),
na = "–",
col_types = c(
Occupation="text",
Total="numeric",
"Women"="numeric",
"White"="numeric",
"Black/African American"="numeric",
"Asian"="numeric",
"Hispanic/Latino"="numeric"
),
skip = 7
)%>%
drop_na(Occupation)
file.remove(tmp)
## [1] TRUE
rm(tmp)
Get the data at EPI
Labor_force_participation <- epidata::get_labor_force_participation_rate(by = "gr")
Medianaverage_hourly_wages <- epidata::get_median_and_mean_wages(by = "gr")
Minimum_wage <- epidata::get_minimum_wage()
cpsaat11
## # A tibble: 596 x 7
## Occupation Total Women White `Black/African A~ Asian `Hispanic/Latin~
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Total, 16 years ~ 147795 46.8 78 12.1 6.4 17.6
## 2 Management, prof~ 63644 51.7 78.7 9.7 8.6 10.4
## 3 Management, busi~ 27143 44.6 81.7 8.8 6.7 10.9
## 4 Management occup~ 18564 40.4 83.4 8 5.8 10.7
## 5 Chief executives 1669 29.3 88 4.3 5.4 7.4
## 6 General and oper~ 1057 30.5 84.4 7.1 4.5 12.4
## 7 Legislators 25 NA NA NA NA NA
## 8 Advertising and ~ 56 52.1 80.5 14.7 3.9 3.5
## 9 Marketing manage~ 554 60.7 84.1 5.5 7.6 9.9
## 10 Sales managers 521 30.9 87.6 5.8 4.2 7.6
## # ... with 586 more rows
Looks fine.
Labor_force_participation
## # A tibble: 513 x 13
## date all women men black black_women black_men hispanic
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1978-01-01 NA NA NA NA NA NA NA
## 2 1978-02-01 NA NA NA NA NA NA NA
## 3 1978-03-01 NA NA NA NA NA NA NA
## 4 1978-04-01 NA NA NA NA NA NA NA
## 5 1978-05-01 NA NA NA NA NA NA NA
## 6 1978-06-01 NA NA NA NA NA NA NA
## 7 1978-07-01 NA NA NA NA NA NA NA
## 8 1978-08-01 NA NA NA NA NA NA NA
## 9 1978-09-01 NA NA NA NA NA NA NA
## 10 1978-10-01 NA NA NA NA NA NA NA
## # ... with 503 more rows, and 5 more variables: hispanic_women <dbl>,
## # hispanic_men <dbl>, white <dbl>, white_women <dbl>, white_men <dbl>
Participation=Labor_force_participation%>%
pivot_longer(-c(date), names_to = "Race", values_to = "Participation", values_drop_na = T)%>%
separate(Race, into = c("Race", "Gender"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 3012 rows [1, 2,
## 3, 4, 7, 10, 13, 14, 15, 16, 19, 22, 25, 26, 27, 28, 31, 34, 37, 38, ...].
Participation=Participation%>%
filter(grepl("women|men", Race, ignore.case = T))%>%
mutate(
Gender=Race,
Race=NA_character_
)%>%
union(
Participation%>%
filter(!grepl("women|men", Race, ignore.case = T))
)
Participation%>%
filter(!is.na(Race))
## # A tibble: 5,020 x 4
## date Race Gender Participation
## <date> <chr> <chr> <dbl>
## 1 1978-12-01 all <NA> 0.634
## 2 1978-12-01 black <NA> 0.617
## 3 1978-12-01 black women 0.535
## 4 1978-12-01 black men 0.718
## 5 1978-12-01 hispanic <NA> 0.633
## 6 1978-12-01 hispanic women 0.47
## 7 1978-12-01 hispanic men 0.812
## 8 1978-12-01 white <NA> 0.635
## 9 1978-12-01 white women 0.499
## 10 1978-12-01 white men 0.785
## # ... with 5,010 more rows
rm(Labor_force_participation)
#adjust for inflation to get to common 2019
Minimum_wage=Minimum_wage%>%
mutate(
Min2019=priceR::adjust_for_inflation(
federal_minimum_wage_real_x_2018_dollars,
2018,
"US",
2019
)
)
## Retrieving countries data
## Generating URL to request all 297 results
## Retrieving inflation data for US
## Generating URL to request all 61 results
Minimum_wage=Minimum_wage%>%
rename(MinCur=federal_minimum_wage_nominal_dollars)%>%
select(Min2019, MinCur, date)
Wages=Wages%>%
rename(
Date=date,
Median=median,
Average=average
)
Participation=Participation%>%
rename(Date=date)
Minimum_wage=Minimum_wage%>%
rename(Date=date)
g=Wages%>%
ggplot(aes(col=Race, x=Date))+
geom_line(aes(y=Average))+
geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
facet_wrap(~Gender)
ggplotly(g)
g=Wages%>%
ggplot(aes(col=Race, x=Date))+
geom_line(aes(y=Median))+
geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
facet_wrap(~Gender)
ggplotly(g)
g=Wages%>%
ggplot()+
geom_point(aes(x=Median, y=Average, col=Race, shape=Gender, frame=Date))+
ggtitle("Median vs Average Wage per Race and Gender over Time")
## Warning: Ignoring unknown aesthetics: frame
ggplotly(g)